if(!require("htmltools")) install.packages("htmltools")
if(!require("remotes")) install.packages("remotes")
if(!require("MultiEWCE")) remotes::install_github("neurogenomics/MutltiEWCE", dependencies = TRUE)
res <- MultiEWCE::prioritise_targets()
## Prioritising gene targets.
## Importing existing file: ... phenotype_to_genes.txt
## Adding term definitions.
## Adding level-3 ancestor to each HPO ID.
## Prioritised targets: step='start'
## - nrows: 435,204
## - phenotypes: 5,652
## - celltypes: 77
## Filtering @ q-value <= 0.05
## Prioritised targets: step='q_threshold'
## - nrows: 7,648
## - phenotypes: 2,580
## - celltypes: 77
## Filtering @ fold-change >= 1
## Prioritised targets: step='fold_threshold'
## - nrows: 7,648
## - phenotypes: 2,580
## - celltypes: 77
## Getting absolute ontology level for 2,580 HPO IDs.
## Annotating phenos with Onset.
## Importing existing file: ... phenotype.hpoa
## Importing existing file: ... phenotype.hpoa
## Prioritised targets: step='keep_onsets'
## - nrows: 7,489
## - phenotypes: 2,520
## - celltypes: 77
## Annotating phenos with Tiers.
## Prioritised targets: step='keep_tiers'
## - nrows: 722
## - phenotypes: 132
## - celltypes: 66
## Annotating phenos with Modifiers
## Prioritised targets: step='severity_threshold'
## - nrows: 645
## - phenotypes: 123
## - celltypes: 66
## - associated diseases: 11
## Annotating phenotype frequencies.
## Prioritised targets: step='pheno_frequency_threshold'
## - nrows: 564
## - phenotypes: 101
## - celltypes: 64
## - associated diseases: 11
## 33 / 64 of cell types kept.
## Prioritised targets: step='keep_celltypes'
## - nrows: 353
## - phenotypes: 86
## - celltypes: 33
## - associated diseases: 11
## Filtering by gene size.
## Converting phenos to GRanges.
## Importing existing file: ... phenotype_to_genes.txt
## Loading required namespace: ensembldb
## Gathering gene metadata
## Loading required namespace: EnsDb.Hsapiens.v75
## 217 / 3,601 genes kept.
## Prioritised targets: step='gene_size'
## - nrows: 2,740
## - phenotypes: 84
## - genes: 217
## Filtering by specificity_quantile.
## Filtering by mean_exp_quantile.
## Prioritised targets: step='keep_celltypes'
## - nrows: 321
## - phenotypes: 84
## - celltypes: 31
## - associated diseases: 10
## Prioritised targets: step='merge_genes'
## - nrows: 11,286
## - phenotypes: 83
## - celltypes: 32
## - associated diseases: 10
## - genes: 217
## Prioritised targets: step='keep_specificity_quantiles'
## - nrows: 307
## - phenotypes: 65
## - celltypes: 27
## - associated diseases: 10
## - genes: 62
## Prioritised targets: step='keep_mean_exp_quantiles'
## - nrows: 307
## - phenotypes: 65
## - celltypes: 27
## - associated diseases: 10
## - genes: 62
## Annotating gene frequencies.
## Importing existing file: ... genes_to_phenotype.txt
## Prioritised targets: step='gene_frequency_threshold'
## - nrows: 307
## - phenotypes: 59
## - celltypes: 27
## - associated diseases: 10
## - genes: 62
## Sorting rows.
## Prioritised targets: step='end'
## - nrows: 307
## - phenotypes: 59
## - celltypes: 27
## - associated diseases: 10
## - genes: 62
Here are the top gene targets based on the default filtering/sorting
criterion of prioritise_targets.
MultiEWCE::create_dt(res$top_targets)
## Loading required namespace: DT
Here is a report that shows how many phenotypes/celltypes/genes
remained after each step within the prioritise_targets
pipeline.
MultiEWCE::create_dt(res$report)
Generate a network from the top phenotype-celltype-gene associations.
vn_top <- MultiEWCE::prioritise_targets_network(top_targets = res$top_targets,
save_path = here::here("reports",
"top_targets_network.html"),
show_plot = FALSE)
## Loading required namespace: visNetwork
## Loading required namespace: igraph
## Creating network.
## Creating plot.
## Saving plot ==> /Users/schilder/Desktop/ewce/RareDiseasePrioritisation/reports/top_targets_network.html
# visNetwork::renderVisNetwork(vn_top$plot)
htmltools::includeHTML("https://github.com/neurogenomics/RareDiseasePrioritisation/raw/master/reports/top_targets_network.html")
df_agg <- MultiEWCE::agg_results(phenos = res$top_targets,
count_var = "CellType",
group_var = "Phenotype")
## Aggregating results by group_var='Phenotype'
## Adding HPO IDs.
## Importing existing file: ... phenotype_to_genes.txt
MultiEWCE::create_dt(df_agg)
Subset phenotypes to those included in intellectual disability, and are related to cognition.
df_intel <- res$top_targets[
disease_characteristic=="Intellectual disability" &
(!Phenotype %in% c("Choreoathetosis","Coma")),]
top_genes <- sort(table(df_intel$Gene),
decreasing = TRUE)
print(top_genes)
##
## SOX3 SOX2 FOXH1 POU3F4 RTL1 FOXG1 GSX2
## 12 10 7 7 6 4 4
## TUBB2A GPR88 PIGY PROP1 SLC18A3 ASCL1 MAB21L2
## 4 3 3 3 3 2 2
## ROM1 SIX6 SNORD116-1 SNORD118 HES7 HOXA2 RIPPLY2
## 2 2 2 2 1 1 1
## TRH
## 1
top_celltypes <- sort(table(unique(df_intel[,c("Phenotype","HPO_ID","CellType")])$CellType),
decreasing = TRUE)
print(top_celltypes)
##
## Excitatory neurons Inhibitory neurons Astrocytes ENS glia
## 4 4 3 3
## Ganglion cells Granule neurons Horizontal cells Oligodendrocytes
## 3 3 3 3
## Purkinje neurons Visceral neurons Amacrine cells Bipolar cells
## 3 2 1 1
## ENS neurons Photoreceptor cells
## 1 1
Now let’s lift some of the filters on phenotypes and cell types to recover a more extensive network.
res_all <- MultiEWCE::prioritise_targets(keep_onsets = NULL,
keep_tiers = NULL,
pheno_frequency_threshold = c(10,NA),
gene_frequency_threshold = c(10,NA),
severity_threshold = NULL,
keep_specificity_quantiles = seq(30,40),
keep_mean_exp_quantiles = seq(30,40))
## Prioritising gene targets.
## Importing existing file: ... phenotype_to_genes.txt
## Adding term definitions.
## Adding level-3 ancestor to each HPO ID.
## Prioritised targets: step='start'
## - nrows: 435,204
## - phenotypes: 5,652
## - celltypes: 77
## Filtering @ q-value <= 0.05
## Prioritised targets: step='q_threshold'
## - nrows: 7,648
## - phenotypes: 2,580
## - celltypes: 77
## Filtering @ fold-change >= 1
## Prioritised targets: step='fold_threshold'
## - nrows: 7,648
## - phenotypes: 2,580
## - celltypes: 77
## Getting absolute ontology level for 2,580 HPO IDs.
## Annotating phenos with Onset.
## Importing existing file: ... phenotype.hpoa
## Annotating phenos with Tiers.
## Annotating phenos with Modifiers
## Annotating phenotype frequencies.
## Prioritised targets: step='pheno_frequency_threshold'
## - nrows: 7,574
## - phenotypes: 2,543
## - celltypes: 76
## - associated diseases: 225
## 37 / 76 of cell types kept.
## Prioritised targets: step='keep_celltypes'
## - nrows: 3,855
## - phenotypes: 1,711
## - celltypes: 37
## - associated diseases: 177
## Filtering by gene size.
## Converting phenos to GRanges.
## Importing existing file: ... phenotype_to_genes.txt
## Gathering gene metadata
## 289 / 4,329 genes kept.
## Prioritised targets: step='gene_size'
## - nrows: 38,512
## - phenotypes: 1,485
## - genes: 289
## Filtering by specificity_quantile.
## Filtering by mean_exp_quantile.
## Prioritised targets: step='keep_celltypes'
## - nrows: 3,855
## - phenotypes: 1,711
## - celltypes: 37
## - associated diseases: 177
## Prioritised targets: step='merge_genes'
## - nrows: 114,402
## - phenotypes: 1,485
## - celltypes: 37
## - associated diseases: 171
## - genes: 289
## Prioritised targets: step='keep_specificity_quantiles'
## - nrows: 16,088
## - phenotypes: 1,247
## - celltypes: 37
## - associated diseases: 154
## - genes: 245
## Prioritised targets: step='keep_mean_exp_quantiles'
## - nrows: 16,088
## - phenotypes: 1,247
## - celltypes: 37
## - associated diseases: 154
## - genes: 245
## Annotating gene frequencies.
## Importing existing file: ... genes_to_phenotype.txt
## Prioritised targets: step='gene_frequency_threshold'
## - nrows: 16,296
## - phenotypes: 1,207
## - celltypes: 37
## - associated diseases: 149
## - genes: 245
## Sorting rows.
## Prioritised targets: step='end'
## - nrows: 16,296
## - phenotypes: 1,207
## - celltypes: 37
## - associated diseases: 149
## - genes: 245
Note that we set physics=FALSE to reduce the computation
that it takes to render the plot.
vn_all <- MultiEWCE::prioritise_targets_network(top_targets = res_all$top_targets,
save_path = here::here("reports",
"all_targets_network.html"),
physics = FALSE,
show_plot = FALSE)
## Creating network.
## Creating plot.
## Saving plot ==> /Users/schilder/Desktop/ewce/RareDiseasePrioritisation/reports/all_targets_network.html
htmltools::includeHTML("https://github.com/neurogenomics/RareDiseasePrioritisation/raw/master/reports/all_targets_network.html")
all_agg <- MultiEWCE::agg_results(phenos = res_all$top_targets,
count_var = "CellType",
group_var = "Phenotype")
## Aggregating results by group_var='Phenotype'
## Adding HPO IDs.
## Importing existing file: ... phenotype_to_genes.txt
MultiEWCE::create_dt(all_agg)
Get the phenotypes that were enriched in the greatest number of cell types.
#### All ontology level ####
head(sort(table(unique(res_all$top_targets[,c("Phenotype","CellType")])$Phenotype),
decreasing = TRUE))
##
## Abnormality of eye movement Intellectual disability
## 16 16
## Abnormal muscle physiology Abnormal muscle tone
## 15 15
## Abnormal nervous system physiology Abnormality of the musculature
## 15 15
#### Only lower ontology levels #####
all_targets <- HPOExplorer::add_ont_lvl(res_all$top_targets)
head(sort(table(unique(res_all$top_targets[ontLvl<=4,c("Phenotype","CellType")])$Phenotype),
decreasing = TRUE))
##
## Intellectual disability Hypotonia
## 16 15
## Neurodevelopmental abnormality Abnormal involuntary eye movements
## 15 14
## Neurological speech impairment Nystagmus
## 14 14
Get the cell types that were enriched in the greatest number of unique phenotypes.
head(sort(table(unique(res_all$top_targets[,c("Phenotype","CellType")])$CellType),
decreasing = TRUE))
##
## Excitatory neurons Antigen presenting cells Cardiomyocytes
## 219 186 167
## ENS glia Limbic system neurons Ganglion cells
## 162 153 148
Get the genes that were enriched in the greatest number of unique phenotypes.
head(sort(table(unique(res_all$top_targets[,c("Phenotype","Gene")])$Gene),
decreasing = TRUE))
##
## RMRP RNU4ATAC TMEM107 FOXH1 FOXG1 SOX2
## 190 186 157 152 148 146
Get the most common ancestors in the results.
ancestor_freq <- sort(table(res_all$top_targets$ancestor), decreasing = TRUE) |>
data.table::data.table() |>
`colnames<-`(c("HPO_ID","freq"))
ancestor_freq$Phenotype <- HPOExplorer::harmonise_phenotypes(phenotypes = ancestor_freq$HPO_ID)
MultiEWCE::create_dt(ancestor_freq)
utils::sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur ... 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] MultiEWCE_0.1.3 remotes_2.4.2 htmltools_0.5.4
##
## loaded via a namespace (and not attached):
## [1] utf8_1.2.3 R.utils_2.12.2
## [3] tidyselect_1.2.0 RSQLite_2.2.20
## [5] AnnotationDbi_1.60.0 htmlwidgets_1.6.1
## [7] grid_4.2.1 BiocParallel_1.32.5
## [9] munsell_0.5.0 codetools_0.2-18
## [11] DT_0.27 colorspace_2.1-0
## [13] Biobase_2.58.0 filelock_1.0.2
## [15] knitr_1.42 rstudioapi_0.14
## [17] orthogene_1.4.1 stats4_4.2.1
## [19] SingleCellExperiment_1.20.0 ggsignif_0.6.4
## [21] MatrixGenerics_1.10.0 GenomeInfoDbData_1.2.9
## [23] bit64_4.0.5 rprojroot_2.0.3
## [25] coda_0.19-4 vctrs_0.5.2
## [27] treeio_1.22.0 generics_0.1.3
## [29] xfun_0.37 BiocFileCache_2.6.0
## [31] R6_2.5.1 GenomeInfoDb_1.34.8
## [33] pals_1.7 AnnotationFilter_1.22.0
## [35] bitops_1.0-7 cachem_1.0.6
## [37] gridGraphics_0.5-1 DelayedArray_0.24.0
## [39] assertthat_0.2.1 promises_1.2.0.1
## [41] BiocIO_1.8.0 scales_1.2.1
## [43] gtable_0.3.1 ontologyPlot_1.6
## [45] ensembldb_2.22.0 rlang_1.0.6
## [47] rtracklayer_1.58.0 rstatix_0.7.2
## [49] lazyeval_0.2.2 dichromat_2.0-0.1
## [51] broom_1.0.3 BiocManager_1.30.19
## [53] yaml_2.3.7 reshape2_1.4.4
## [55] HPOExplorer_0.99.3 abind_1.4-5
## [57] GenomicFeatures_1.50.4 ggnetwork_0.5.10
## [59] crosstalk_1.2.0 backports_1.4.1
## [61] httpuv_1.6.8 tools_4.2.1
## [63] ggplotify_0.1.0 statnet.common_4.8.0
## [65] ggplot2_3.4.0 ellipsis_0.3.2
## [67] jquerylib_0.1.4 paintmap_1.0
## [69] BiocGenerics_0.44.0 Rcpp_1.0.10
## [71] plyr_1.8.8 visNetwork_2.1.2
## [73] progress_1.2.2 zlibbioc_1.44.0
## [75] purrr_1.0.1 RCurl_1.98-1.10
## [77] prettyunits_1.1.1 ggpubr_0.5.0
## [79] S4Vectors_0.36.1 SummarizedExperiment_1.28.0
## [81] grr_0.9.5 here_1.0.1
## [83] magrittr_2.0.3 data.table_1.14.6
## [85] ProtGenerics_1.30.0 matrixStats_0.63.0
## [87] hms_1.1.2 patchwork_1.1.2
## [89] mime_0.12 evaluate_0.20
## [91] xtable_1.8-4 XML_3.99-0.13
## [93] EWCE_1.6.0 IRanges_2.32.0
## [95] compiler_4.2.1 biomaRt_2.54.0
## [97] tibble_3.1.8 maps_3.4.1
## [99] crayon_1.5.2 R.oo_1.25.0
## [101] ggfun_0.0.9 later_1.3.0
## [103] tidyr_1.3.0 aplot_0.1.9
## [105] DBI_1.1.3 ExperimentHub_2.6.0
## [107] gprofiler2_0.2.1 dbplyr_2.3.0
## [109] rappdirs_0.3.3 babelgene_22.9
## [111] EnsDb.Hsapiens.v75_2.99.0 Matrix_1.5-3
## [113] car_3.1-1 piggyback_0.1.4
## [115] cli_3.6.0 R.methodsS3_1.8.2
## [117] parallel_4.2.1 igraph_1.3.5
## [119] GenomicRanges_1.50.2 pkgconfig_2.0.3
## [121] GenomicAlignments_1.34.0 plotly_4.10.1
## [123] xml2_1.3.3 ggtree_3.6.2
## [125] bslib_0.4.2 XVector_0.38.0
## [127] yulab.utils_0.0.6 stringr_1.5.0
## [129] digest_0.6.31 graph_1.76.0
## [131] Biostrings_2.66.0 rmarkdown_2.20
## [133] HGNChelper_0.8.1 tidytree_0.4.2
## [135] restfulr_0.0.15 curl_5.0.0
## [137] shiny_1.7.4 Rsamtools_2.14.0
## [139] rjson_0.2.21 lifecycle_1.0.3
## [141] nlme_3.1-161 jsonlite_1.8.4
## [143] carData_3.0-5 network_1.18.1
## [145] mapproj_1.2.11 viridisLite_0.4.1
## [147] limma_3.54.1 fansi_1.0.4
## [149] pillar_1.8.1 ontologyIndex_2.10
## [151] lattice_0.20-45 homologene_1.4.68.19.3.27
## [153] KEGGREST_1.38.0 fastmap_1.1.0
## [155] httr_1.4.4 interactiveDisplayBase_1.36.0
## [157] glue_1.6.2 RNOmni_1.0.1
## [159] png_0.1-8 ewceData_1.6.0
## [161] BiocVersion_3.16.0 bit_4.0.5
## [163] Rgraphviz_2.42.0 stringi_1.7.12
## [165] sass_0.4.5 blob_1.2.3
## [167] AnnotationHub_3.6.0 memoise_2.0.1
## [169] dplyr_1.1.0 ape_5.6-2